{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 387,
   "id": "de02baff",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import stats\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "plt.style.use('fivethirtyeight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "702ababf",
   "metadata": {},
   "outputs": [],
   "source": [
    "COLORS = [\n",
    "    '#00B0F0',\n",
    "    '#FF0000'\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d417229",
   "metadata": {},
   "source": [
    "# Additional code for data generating processes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ac23d7d9",
   "metadata": {},
   "source": [
    "## Chapter 09"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7058fff7",
   "metadata": {},
   "source": [
    "### Post-training earnings data (simple)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "865c3739",
   "metadata": {},
   "outputs": [],
   "source": [
    "SAMPLE_SIZE = 200\n",
    "MAX_AGE = 50\n",
    "\n",
    "age = stats.halfnorm.rvs(loc=19, scale=10, size=SAMPLE_SIZE).astype(int)\n",
    "age = np.where(age > MAX_AGE, np.random.choice(np.arange(20, MAX_AGE)), age)\n",
    "\n",
    "took_a_course = stats.bernoulli(p=10/age).rvs().astype(bool)\n",
    "\n",
    "earnings = 75000 + took_a_course * 10000 + age * 1000 + age**2 * 50 + np.random.randn(SAMPLE_SIZE) * 2000\n",
    "earnings = earnings.round()\n",
    "\n",
    "earnings = pd.DataFrame(dict(\n",
    "    age=age,\n",
    "    took_a_course=took_a_course,\n",
    "    earnings=earnings\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "ca46d6ce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x1c786e7e0d0>"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAb4AAAEJCAYAAADrQkIkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAABB4ElEQVR4nO3de1iU95n4/zcOiCNyUBgEgmBIcArEfmmy6ylbI2BVjDmYkGJ/bex6adAQf6Z+F0XiIbXLRk03ifmpMa7Sba80BxdqIm6I7maBRC2K1iIWXSSXKQJBEBRkkOMwvz8sow8zDzKcmblf15XrivfcDM9nHvTm83k+B6e6ujoTQgghhIMYNdQXIIQQQgwmKXxCCCEcihQ+IYQQDkUKnxBCCIcihU8IIYRDkcInhBDCoUjhE0II4VCk8AkhhHAoUviGQElJyVBfwqCRttonR2mro7QTHKutUviEEEI4FCl8QgghHIoUPiGEEA7lvoXv7bffJioqikmTJvHQQw8RHx/PxYsXVfNfffVVvLy82LVrlyLe0tLCunXrCAkJISAggCVLllBRUaHIqaurIyEhgaCgIIKCgkhISKCurk6RU1ZWRnx8PAEBAYSEhLB+/XpaW1sVOUVFRSxcuBA/Pz/CwsLYsWMHJpPsxS2EEKIHhe/EiRMsX76cY8eOkZmZibOzM88++yw3b960yD18+DDnzp3D39/f4rWUlBSOHDlCWloaWVlZNDQ0EB8fj9FoNOesWLGCwsJC0tPTycjIoLCwkJUrV5pfNxqNxMfHYzAYyMrKIi0tjczMTDZu3GjOuXXrFosXL8bX15fs7Gy2b9/Orl272L17t80fjhBCCPvjfL+EQ4cOKf68b98+goKCOHXqFLGxseb41atX2bBhA5999hlxcXGKr6mvr+eDDz5gz549REVFmd9n6tSp5ObmEhMTQ3FxMV9++SVHjx5l+vTpALzzzjvExsZSUlJCaGgo2dnZXLp0iQsXLhAYGAjA1q1bWbNmDZs3b8bDw4P09HSamprYu3cvWq2W8PBwLl++zHvvvcfq1atxcnLq2ycmhBBiwJQ2tJF6roHK20b8x2rY9Kg7we4u/fo9bH7GZzAY6OjowMvLyxxrb29nxYoVJCUlodfrLb6moKCAtrY2oqOjzbHAwED0ej2nT58GID8/n3HjxpmLHsCMGTNwc3NT5Oj1enPRA4iJiaGlpYWCggJzzsyZM9FqtYqcyspKSktLbW2uEEKIQVLa0Mazx2pJv9LEiWutpF9p4tljtZQ2tPXr97lvj6+rDRs2MHXqVKZNm2aObdu2jfHjx7N8+XKrX1NdXY1Go8Hb21sR1+l0VFdXm3O8vb0VPTInJyd8fHwUOTqdTvEe3t7eaDQaRU5AQIDF9+l8bfLkyVavcbDXsDjSmhlpq31ylLY6Sjth6Nu6udiFbxuUvbtvG4wkf1XBP+ttK36hoaGqr9lU+F577TVOnTrF0aNH0Wg0wJ1ngB999BHHjx+36aIATCaTRaHrTU7XeNeczokt3Q1zdvch9bfOoVtHIG21T47SVkdpJwyPthq+uQ60WsQbNW6Ehuosv6CXejzUmZKSwh/+8AcyMzMVvabjx49z7do19Ho93t7eeHt7U1ZWxuuvv054eDgAvr6+GI1GamtrFe9ZU1Nj7o35+vpSU1OjmH1pMpmora1V5HT27DrV1tZiNBq7zampqQGw6C0KIYQYPvzHaqzG/VTivdWjwpecnExGRgaZmZlMmTJF8dqKFSs4efIkx48fN//n7+9PYmIihw8fBiAyMhIXFxdycnLMX1dRUUFxcbH5md60adMwGAzk5+ebc/Lz82lsbFTkFBcXK5ZB5OTk4OrqSmRkpDknLy+P5uZmRY6/vz/BwcG2fDZCCCEG0aZH3XnQXVnkHnS/M8GlP913qDMpKYmDBw/y+9//Hi8vL6qqqgBwc3Nj3Lhx6HQ6i56Us7MzEydONHebPT09efHFF9myZQs6nY7x48ezceNGIiIimDNnDgB6vZ65c+eydu1a3n33XUwmE2vXrmX+/Pnm94mOjiYsLIxVq1aRmprKzZs32bJlC0uXLsXDwwOAuLg4duzYQWJiIklJSXzzzTfs3LmT9evXy4xOIYQYxoLdXfhsvjep5xq4dtuI3wDN6rxv4Ttw4AAAzzzzjCKenJxMSkpKj7/RG2+8gUajYdmyZTQ3NzN79mzef/9987NCgP3795OcnMxzzz0HQGxsLG+++ab5dY1Gw8GDB0lKSmLBggWMGTOGuLg4UlNTzTmenp58+umnJCUlERUVhZeXF6+88gqrV6/u8bUKIYQYGsHuLux/YsKAfg+nuro62dJkkA2Hh8iDRdpqnxylrY7STnCstspenUIIIRyKzev4hBBC2I/OnVKu1LgS8t2NAXmmNtxI4RNCCAfVuVPKtw1GQMOfbjVx9norn833tuviJ0OdQgjhoFLPNfyt6N31bYOR1HMNQ3RFg0MKnxBCOKjK20ar8WsqcXshhU8IIRzUYO2UMtxI4RNCCAc1WDulDDdS+IQQwkF17pTyQoiWxzyNvBCitfuJLSCzOoUQwqF17pRSUlJLaGjQUF/OoJAenxBCCIcihU8IIYRDkcInhBDCoUjhE0II4VCk8AkhhHAoUviEEEI4FCl8QgghHIoUPiGEEA5FCp8QQgiHIoVPCCGEQ5HCJ4QQwqFI4RNCCOFQ7lv43n77baKiopg0aRIPPfQQ8fHxXLx40fx6W1sbr7/+OrNmzSIgIAC9Xs+KFSsoKytTvE9LSwvr1q0jJCSEgIAAlixZQkVFhSKnrq6OhIQEgoKCCAoKIiEhgbq6OkVOWVkZ8fHxBAQEEBISwvr162ltbVXkFBUVsXDhQvz8/AgLC2PHjh2YTCZbPxshhBB26L6F78SJEyxfvpxjx46RmZmJs7Mzzz77LDdv3gTg9u3bnD9/nqSkJL766is++ugjKioqiIuLo7293fw+KSkpHDlyhLS0NLKysmhoaCA+Ph6j8e5JvytWrKCwsJD09HQyMjIoLCxk5cqV5teNRiPx8fEYDAaysrJIS0sjMzOTjRs3mnNu3brF4sWL8fX1JTs7m+3bt7Nr1y52797dLx+YEEKIkc2prq7Opq6QwWAgKCiIDz/8kNjYWKs5//u//8uMGTM4efIkERER1NfX8/DDD7Nnzx5+/OMfA1BeXs7UqVPJyMggJiaG4uJipk+fztGjR5kxYwYAeXl5xMbGcubMGUJDQ/nv//5vfvzjH3PhwgUCAwMBOHjwIGvWrKGkpAQPDw/S0tL45S9/yeXLl9FqtQD8+te/5je/+Q0XL17Eycmp1x9WfykpKSE0NHSoL2NQSFvtk6O01VHaCY7VVpuf8RkMBjo6OvDy8lLNaWhoADDnFBQU0NbWRnR0tDknMDAQvV7P6dOnAcjPz2fcuHFMnz7dnDNjxgzc3NwUOXq93lz0AGJiYmhpaaGgoMCcM3PmTHPR68yprKyktLTU1uYKIYSwMzYfRLthwwamTp3KtGnTrL7e2trKpk2bWLBgAQ888AAA1dXVaDQavL29Fbk6nY7q6mpzjre3t6JH5uTkhI+PjyJHp9Mp3sPb2xuNRqPICQgIsPg+na9NnjzZ6nWXlJT0pPn9ZrC/31CSttonR2mro7QT7Kut3fVebSp8r732GqdOneLo0aNoNBqL19vb20lISKC+vp6PP/74vu9nMpksCl1vcrrGu+Z0TmzpbphzMLv4jjSkIG21T47SVkdpJzhWW3s81JmSksIf/vAHMjMzrfaa2tvbWb58OUVFRRw+fJgJEyaYX/P19cVoNFJbW6v4mpqaGnNvzNfXl5qaGsXsS5PJRG1trSKns2fXqba2FqPR2G1OTU0NgEVvUQghhOPpUeFLTk4mIyODzMxMpkyZYvF6W1sby5Yto6ioiCNHjjBx4kTF65GRkbi4uJCTk2OOVVRUmCe0AEybNg2DwUB+fr45Jz8/n8bGRkVOcXGxYhlETk4Orq6uREZGmnPy8vJobm5W5Pj7+xMcHNyT5gohhLBj9y18SUlJfPTRRxw4cAAvLy+qqqqoqqrCYDAAd3p6P//5zzl79iwHDhzAycnJnNPU1ASAp6cnL774Ilu2bCE3N5fz58+zcuVKIiIimDNnDgB6vZ65c+eydu1azpw5Q35+PmvXrmX+/Pnm7nd0dDRhYWGsWrWK8+fPk5uby5YtW1i6dCkeHh4AxMXFodVqSUxM5OLFi2RmZrJz504SExOHxYxOIYQQQ+u+z/gOHDgAwDPPPKOIJycnk5KSQkVFBVlZWQDmItZpz549/PSnPwXgjTfeQKPRsGzZMpqbm5k9ezbvv/++4lnh/v37SU5O5rnnngMgNjaWN9980/y6RqPh4MGDJCUlsWDBAsaMGUNcXBypqanmHE9PTz799FOSkpKIiorCy8uLV155hdWrV9vyuQghhLBTNq/jE33nSA+Rpa32yVHaOlDtLG1oI/VcA5W3jfiP1bDpUXeC3V36/fvYwlHuKfRiOYMQQojeK21o49ljtXzbcHfXqrPXW/lsvveQFz9HIZtUCyHEIEo916AoegDfNhhJPdcwRFfkeKTwCSHEIKq8bbQav6YSF/1PCp8QQgwi/7GWm38A+KnERf+TwieEEINo06PuPOiuLHIPut+Z4CIGh0xuEUKIQRTs7sJn871JPdfAtdtG/IbJrE5HIoVPCCEGWbC7C/ufmHD/xL8ZjssfRjIpfEIIMYzJ8of+J8/4hBBiGJPlD/1PCp8QQgxjsvyh/8lQpxBC9NFAPoOT5Q/9TwqfEEL0wUA/g9v0qDtnr7cq3l+WP/SNDHUKIUQfDPQzuM7lDy+EaPmh32heCNHKxJY+kh6fEEL0wWA8g7N1+YPonvT4hBCiD+QZ3MgjhU8IIfpAtiAbeWSoUwgh+qA3W5DJTixDSwqfEEL0kS3P4GQnlqEnQ51CCDGIZCeWoSeFTwghBpHsxDL0ZKhTCCH6yJZndjILdOjdt8f39ttvExUVxaRJk3jooYeIj4/n4sWLihyTycS2bdv43ve+h5+fH08++SSXLl1S5LS0tLBu3TpCQkIICAhgyZIlVFRUKHLq6upISEggKCiIoKAgEhISqKurU+SUlZURHx9PQEAAISEhrF+/ntbWVkVOUVERCxcuxM/Pj7CwMHbs2IHJZLLlcxFCiB7pfGaXfqWJE9daSb/SxLPHailtaLOa/49TtDg7KWPOTnfiYnDct/CdOHGC5cuXc+zYMTIzM3F2dubZZ5/l5s2b5px3332XPXv2sGPHDrKzs9HpdCxevJiGhrtj1ikpKRw5coS0tDSysrJoaGggPj4eo/Fu937FihUUFhaSnp5ORkYGhYWFrFy50vy60WgkPj4eg8FAVlYWaWlpZGZmsnHjRnPOrVu3WLx4Mb6+vmRnZ7N9+3Z27drF7t27+/xhCSFEV7Y+s/vt5Sbau/we3m66ExeD475DnYcOHVL8ed++fQQFBXHq1CliY2MxmUzs3buXX/ziFzzzzDMA7N27l9DQUDIyMli2bBn19fV88MEH7Nmzh6ioKPP7TJ06ldzcXGJiYiguLubLL7/k6NGjTJ8+HYB33nmH2NhYSkpKCA0NJTs7m0uXLnHhwgUCAwMB2Lp1K2vWrGHz5s14eHiQnp5OU1MTe/fuRavVEh4ezuXLl3nvvfdYvXo1Tk5dftUSQog+sPWZnTzjG3o2T24xGAx0dHTg5eUFQGlpKVVVVURHR5tztFots2bN4vTp0wAUFBTQ1tamyAkMDESv15tz8vPzGTdunLnoAcyYMQM3NzdFjl6vNxc9gJiYGFpaWigoKDDnzJw5E61Wq8iprKyktLTU1uYKIUS3bH1mJ8/4hp7NhW/Dhg1MnTqVadOmAVBVVQWATqdT5Ol0OqqrqwGorq5Go9Hg7e3dbY63t7eiR+bk5ISPj48ip+v38fb2RqPRdJvT+efOHCGE6C+27twiO70MPZtmdb722mucOnWKo0ePotEob1zXIUSTyXTfYcWuOdbye5LTNW7tWrr7WoCSkpJur7W/Dfb3G0rSVvvkKG3tSTvfmeLE+1edud46Ct3oDlYFNdF6rYGSa/2TP1js6Z6GhoaqvtbjwpeSksKhQ4c4cuQIkydPNscnTpwI3OlN3TsEWVNTY+5p+fr6YjQaqa2txcfHR5Eza9Ysc05NTY2i0JlMJmpraxXv0zns2am2thaj0ajI6dqzq6mpASx7pffq7kPqb53PLB2BtNU+OUpbe9rOUGDO93v+vrbmDwZHuafQw6HO5ORkMjIyyMzMZMqUKYrXgoODmThxIjk5OeZYc3MzeXl55ud1kZGRuLi4KHIqKiooLi4250ybNg2DwUB+fr45Jz8/n8bGRkVOcXGxYhlETk4Orq6uREZGmnPy8vJobm5W5Pj7+xMcHNyjD0UIIYT9um/hS0pK4qOPPuLAgQN4eXlRVVVFVVUVBoMBuDN8+PLLL7Nz504yMzO5ePEiiYmJuLm5ERcXB4CnpycvvvgiW7ZsITc3l/Pnz7Ny5UoiIiKYM2cOAHq9nrlz57J27VrOnDlDfn4+a9euZf78+ebfQqKjowkLC2PVqlWcP3+e3NxctmzZwtKlS/Hw8AAgLi4OrVZLYmIiFy9eJDMzk507d5KYmCgzOoUQQtx/qPPAgQMA5qUKnZKTk0lJSQHg1VdfpampiXXr1lFXV8djjz3GoUOHcHe/+7D2jTfeQKPRsGzZMpqbm5k9ezbvv/++4lnh/v37SU5O5rnnngMgNjaWN9980/y6RqPh4MGDJCUlsWDBAsaMGUNcXBypqanmHE9PTz799FOSkpKIiorCy8uLV155hdWrV/fm8xFCCGFnnOrq6mRLk0HmSGPp0lb75ChtdZR2gmO1VTapFkII4VCk8AkhhHAoUviEEEI4FCl8QgghHIoUPiGEEA5FDqIVQjgEWw6LFfZNCp8Qwu51HhZ777l5Z6+38tl8byl+DkiGOoUQds/Ww2KFfZPCJ4Swe3L4q7iXFD4hhN2Tw1/FvaTwCSHsnhz+Ku4lk1uEEMPCQM66DHZ34bP53qSea+DabSN+MqvToUnhE0IMucGYdRns7sL+Jyb0y3uJkU2GOoUQQ643sy5LG9p46asbLPriOi99dYPShraBvkxhJ6THJ4QYcrbOuixtaOPJrOuU3757qlretWY+X6hT7SGerGzi5RP11LV04OU6ir3/4Mnj/tq+X7wYcaTHJ4QYcrbOutxwql5R9ADKb5vYcKreav7JyiaeOXaDqwYjt9pMXDUYeebYDU5WNvXtwsWIJIVPCDHkbJ11eaq6xab4yyfqae9y5Ha76U5cOB4Z6hRCDDlbZ13ebrf+PmrxupYOq/F6lbiwb1L4hBDDgi2zLt2cnWhpNVnGXZys5nu5juJWm+XzQk9XGfRyRHLXhRAjzoyJo63Hfa3H9/6DJ85daqKz0524cDxS+IQQw4ItyxO2Tfck0E35z1eg2yi2TbdeyB7313J4/gSCxmnwdHEiaJyGw/MnyKxOByVDnUKIIWfrAvZgdxc+j/WxaSeWx/21FL4ghU70sMd38uRJlixZQlhYGF5eXnz44YeK1w0GA+vWrSM8PBw/Pz/+7u/+jj179ihyWlpaWLduHSEhIQQEBLBkyRIqKioUOXV1dSQkJBAUFERQUBAJCQnU1dUpcsrKyoiPjycgIICQkBDWr19Pa2urIqeoqIiFCxfi5+dHWFgYO3bswGSyfB4ghLBNZ69sVaFrvy4a780C9s5ngkdidex/YoJsPyZ6rEeFr7GxkfDwcLZv345Wa/kb08aNG/mv//ov3n//fU6fPs0//dM/sXXrVj755BNzTkpKCkeOHCEtLY2srCwaGhqIj4/HaLz7w75ixQoKCwtJT08nIyODwsJCVq5caX7daDQSHx+PwWAgKyuLtLQ0MjMz2bhxoznn1q1bLF68GF9fX7Kzs9m+fTu7du1i9+7dvfqAhBB3dC4aT7/SxJ9uaUi/0sSTWdf7pfjJsUFiMPVoqHPevHnMmzcPgMTERIvX8/PziY+PZ/bs2QAEBwfzwQcf8Kc//YklS5ZQX1/PBx98wJ49e4iKigJg3759TJ06ldzcXGJiYiguLubLL7/k6NGjTJ8+HYB33nmH2NhYSkpKCA0NJTs7m0uXLnHhwgUCAwMB2Lp1K2vWrGHz5s14eHiQnp5OU1MTe/fuRavVEh4ezuXLl3nvvfdYvXo1Tk7WZ30J4Yhs2Ri6u0XjH//Ip0/XIccGicHUL5NbZsyYwdGjRykvLwfg9OnT/OUvfyEmJgaAgoIC2traiI6ONn9NYGAger2e06dPA3eK57hx48xFr/N93dzcFDl6vd5c9ABiYmJoaWmhoKDAnDNz5kxFzzQmJobKykpKS0v7o7lC2IV7e3AnrrXetwd3tsa2uC3k2CAxmPplcsuOHTtYu3YtjzzyCM7Od97yzTffZMGCBQBUV1ej0Wjw9vZWfJ1Op6O6utqc4+3treiROTk54ePjo8jR6XSK9/D29kaj0ShyAgICLL5P52uTJ0+22oaSkpLeNL3XBvv7DSVp6/D0T0UulN9W9u7Kb5v4f/+ngrciLIuZsX0M1n5XNra390u735nixPtXnbneOgrd6A5WBTXReq2Bkmt9fus+GUn3tK/sqa2hoaGqr/VL4du3bx+nT5/m448/ZtKkSfzxj39k8+bNBAUFMXfuXNWvM5lMFoWuNzld411zOie2dDfM2d2H1N86h24dgbR1+Lp0thKw3LnkUpMroaGTLeIzSmvIKrPcEmyGv5bQ0El9vp5QYM73+/w2/Wqk3dO+cKS29rnwNTU18atf/Yrf/va3xMbGAvDII49w4cIFdu3axdy5c/H19cVoNFJbW4uPz91nATU1NcyaNQsAX19fampqFIXOZDJRW1tr7rH5+vqahz071dbWYjQaFTmdvb97vw9g0VsUwrGpzXS2Ht823ZPCGzWUN94tlt2tnRvIg2WF6Is+P+Nra2ujra0NjUY5Pq/RaOjouPMXJDIyEhcXF3JycsyvV1RUUFxcbH6mN23aNAwGA/n5+eac/Px8GhsbFTnFxcWKZRA5OTm4uroSGRlpzsnLy6O5uVmR4+/vT3BwcF+bK4Td+Hud9V1O1OKda+deCNHymKeRF0K0fB7rY7WYda7Lu/f54bPHauXMPDEs9KjwGQwGCgsLKSwspKOjg/LycgoLCykrK8PDw4PHH3+crVu3cvz4cf7617/y4Ycf8sknn7Bo0SIAPD09efHFF9myZQu5ubmcP3+elStXEhERwZw5cwDQ6/XMnTuXtWvXcubMGfLz81m7di3z5883d7+jo6MJCwtj1apVnD9/ntzcXLZs2cLSpUvx8PAAIC4uDq1WS2JiIhcvXiQzM5OdO3eSmJgoMzqFuIetu5/A3bVz709t6XbtXG/W5QkxWJzq6uruu7L7+PHjPPXUUxbxn/zkJ+zdu5eqqiq2bt1KTk4ON2/eZNKkSSxdulSxfKC5uZnNmzeTkZFBc3Mzs2fP5q233lLM0Lx58ybJycl88cUXAMTGxvLmm2/i5eVlzikrKyMpKYmvv/6aMWPGEBcXR2pqKq6uruacoqIikpKSOHfuHF5eXixbtozk5ORhU/gcaSxd2jq8dQ5H9nT3k878KzUGQnzGqeYv+uI6J661WsR/6DeaI7Ej55HDSLynveVIbe1R4RP9y5F+wKSt9sPatmIPumusbiv2/3xpfSLMwkmufDS3b2v+BpO939N7OVJbZZNqIUSP2DJ8qbZDoOwcKIYDKXxCiB6xZVux6mbruddV4kIMJil8Qogece96oN3fjLMSv95svWtXrRIXYjDJsURCDGPDaS2c2twwa3Ff7SiuGix7dxO18ru2GHpS+IQYpmw9o26g3Wqz3ltrsBJ/0N2Zs9ct1+xNdpd/csTQk1+/hBimerMWzpZTzG1lywkKsum0GM7k1y8hhilbz6grbWgj5kg1NfesIsipaOJ/nvLtlx7iP07R8um3TbTf08FzdroT7yrY3YXP5nvbtEZQiMEihU+IYcrWM+oSvrqhKHoANS134scWTezz9fz2srLoAbSb7sQf97de/PY/MaHP31eI/iZDnUIMU7YOF+Zfb7cpbqsrt6wPm36rEhdiuJIenxDDlK3DhbadtXCHLbNGZYmCsBdS+IQYAXpSWpxU8tR2qLV11qgsURD2Qn5ihRimbD3aZ5rO+u+xanFbZ40+qLIUQZYoiJFGCp8Qw5SthenfnpiAj6sy5uN6J26NrbNGZYmCsBfyq5oQw5SthSnY3YX/ecq3x88EnVUGUDUq8XufOV6pNRDirX4skRDDmRQ+IYYpW5czgG1LCIrrrRdQtfi9719SUktoaFCPvo8Qw40MdQoxTA300GJDW4dNcSHshfT4hBhEtiwf6M3uJycrm3j5RD11LR14uY5i7z94Wl1cDtChMg+0Q3UeqBD2QQqfEH3UWcyu1LgS8t0N1eLUm02nbRm6PFnZxDPHbph3V7nVZuSZYzc4PH+C1eIX4j6Kv9y0HNZ8yF0GgoR9k59wIfrg3iUHf7ql6XbJQW82nbbFyyfqrW4p9vKJeqv5YeNHW41/TyUuhL2QwidEH9hSzGydpWmrGyqnm6vF/3GKlq5nyKptOi2EPZHCJ0Qf2FLMejNL05ZjhtSezanFu9t0Wgh71qPCd/LkSZYsWUJYWBheXl58+OGHFjnffPMNP/vZzwgKCsLf35/Zs2dTXFxsfr2lpYV169YREhJCQEAAS5YsoaKiQvEedXV1JCQkEBQURFBQEAkJCdTV1SlyysrKiI+PJyAggJCQENavX09ra6sip6ioiIULF+Ln50dYWBg7duzAZJL9BEX/G8gz6mzduSVE5dmc2jO7ge6BCjFc9ajwNTY2Eh4ezvbt29FqLYdB/vrXvzJ//nyCg4PJzMwkLy+PTZs24ebmZs5JSUnhyJEjpKWlkZWVRUNDA/Hx8RiNd/+SrVixgsLCQtLT08nIyKCwsJCVK1eaXzcajcTHx2MwGMjKyiItLY3MzEw2btxozrl16xaLFy/G19eX7Oxstm/fzq5du9i9e3evPiAhumPLcGHnLM0XQrT80G80L4Rou53YYuszQVuf2fWmByqEPejRrM558+Yxb948ABITEy1eT01NJTo6mn/5l38xxyZPnmz+//r6ej744AP27NlDVFQUAPv27WPq1Knk5uYSExNDcXExX375JUePHmX69OkAvPPOO8TGxlJSUkJoaCjZ2dlcunSJCxcuEBgYCMDWrVtZs2YNmzdvxsPDg/T0dJqamti7dy9arZbw8HAuX77Me++9x+rVq3Fykqnaov8M5Bl1th4DtOlRd/KqWihvvLsOL9BtlGqPctOj7py93qoorrIFmXAEfX7G19HRwdGjR9Hr9Tz//PM89NBDREVFcejQIXNOQUEBbW1tREdHm2OBgYHo9XpOnz4NQH5+PuPGjTMXPYAZM2bg5uamyNHr9eaiBxATE0NLSwsFBQXmnJkzZyp6pjExMVRWVlJaWtrX5gqhMJBn1PXqGKCuQ/rdDPHb2gMVwl70eR3f9evXMRgMvP3227z22mu8/vrrfP3117z00kuMHTuWBQsWUF1djUajwdvbW/G1Op2O6upqAKqrq/H29lb0yJycnPDx8VHk6HQ6xXt4e3uj0WgUOQEBARbfp/O1e3ui9yopKen9h9ALg/39hpI9t/W7BlfAcmiwoqGlz+32wPp7e9Jq9b03F7tQfltZtMpvm0j+qoJ/1qsX4vX3/HVpvVZLybWeXZ8939d7OUo7wb7aGhoaqvpanwtfR8edYZWFCxeyevVqAL7//e9TUFDAgQMHWLBggerXmkwmi0LXm5yu8a45nRNbuhvm7O5D6m+dQ7eOwN7b+kBxNZXXLYtKoMeYPu9lGfbdDf5isJxh+T3fcVbf2/DNdaDVIt6ocSM0VGcR7wt7v6+dHKWd4Fht7fNQp7e3N87Ozuj1ekV8ypQplJeXA+Dr64vRaKS2tlaRU1NTY+6N+fr6UlNTo5h9aTKZqK2tVeR09uw61dbWYjQau82pqakBsOgtCtFXvmOs/xXSqcRtYessUJmsIkTP9Plv5+jRo3n00UctusjffPMNkyZNAiAyMhIXFxdycnLMr1dUVFBcXGx+pjdt2jQMBgP5+fnmnPz8fBobGxU5xcXFimUQOTk5uLq6EhkZac7Jy8ujublZkePv709wcHBfmyuEgtojtP5YPWPrMzg5L0+InulR4TMYDBQWFlJYWEhHRwfl5eUUFhZSVlYGwJo1a/j000/57W9/y5UrV/jd737HoUOHWLFiBQCenp68+OKLbNmyhdzcXM6fP8/KlSuJiIhgzpw5AOj1eubOncvatWs5c+YM+fn5rF27lvnz55u739HR0YSFhbFq1SrOnz9Pbm4uW7ZsYenSpXh4eAAQFxeHVqslMTGRixcvkpmZyc6dO0lMTJQZnaLfVavsinJdJW6rckM7p6tbOV/bxunqVsoN7aq5MllFiJ5xqquru+/vpsePH+epp56yiP/kJz9h7969AHz44Ye8/fbbVFRUEBISwv/9v/+XuLg4c25zczObN28mIyOD5uZmZs+ezVtvvaWYoXnz5k2Sk5P54osvAIiNjeXNN9/Ey8vLnFNWVkZSUhJff/01Y8aMIS4ujtTUVFxd7x49XVRURFJSEufOncPLy4tly5aRnJw8bAqfI42lj8S22nKCwvfTr3HVYFnkgsZpKHzBr0/X0XXTabizRlBt0+nBNBLva284SjvBsdrao8In+pcj/YANh7baUsisnaDwoLtGtec09z+rOWtlcsvf61z470W+fbrugSyqfTUc7utgcJR2gmO1VY4lEnattKGNJ7+oUSzqzqtq4fNYH6uFrLvdUqwtPH/Q3dlq4Zvsbv2vli1FWG1z6Zv9NIwqhKOSTaqFXUs5Xa8oegDljR2knLZ+VM+3Ddafof1VJW7LlmW27r2ptrm0UQ6KFaJPpPAJu3bmuuW6tu7i1U0dVuNVKnFbTjiwde/NB8dZL3AhKnEhRM9I4RN2Tq1IWI/rxliP+6rEbTnhwNbTEMInuFqNh6nEhRA9I4VP2LW/87H+/EwtHuJhPf6gStyWReO2LjCXdXlCDAwpfGLEseVw1u0zPAkcq+ytBY51YvsMT6v5thYbW/I3Pepu9VrU3lvW5QkxMGRWpxhRrC03OHu9VbUgBLu78PlCHannGrh224jffWZSdhab3uRfqTUQ4j2u23ycnABTlz+rs+UYIyFEz0jhEyOKrcsN7tXTBau9Ljb3+Qap5xqszjDtybULIfqPFD4xotg6QcTWHqKtShvamP95DdeaOgANf7rVxPHKFo49ablO0NZrF0IMDHnGJ0YUWyeI2LqEAGx7hvjqybq/Fb27rjV18OrJuj5fuxBiYEiPT4womx51J6+qRTFkGOg2SnWCyED3EPOqrK8HtBbf9Kg7Z6+3WmyHJrM0hRhc0uMTI05be0e3f77XQPcQjSrP9TqsxGWWphDDg/T4xLDQ0z0sN5yqp6pFGatquRP/+Ec+Fvm29rJs3bJMN8aJyibLKuejsuBdZmkKMfSk8IkhZ8vw4tka68/b1OK2Lk/4rtH6EGiFSvzAE+N56ugN7u1zjvpbXAgxPEnhE0POtiUKamsG1NcS2NLLalIZNlWLP+6v5ciCCbx8op4bTe1M0Dqz9x88h/y8PCGEOil8YshduWW9t/atlfjf60aTVdZiNd4fnLouMFfErXvcX0vhC1qHOs9MiJFMJreIIXe92XpvrdpKfNt0TwLdlD+2gW6j2Dbd+hZkYNvyBFeVvxFqcSHEyCM9PjEgTlY2/W34T8uEgmvdDv95jXbiqkq8q2B3Fz6P9enxM7vShjaezLpO+e27RTTvWjOfL9RZ/RrvMaOobLJ8nuczRiqfEPZCCp/odycrm3j62I2/TfV3wmAw8vSxG2TOn2C1+FU2Wp8xqRa35ZndhlP1iqIHUH7bpDoLtLbZ+rO8GpW4EGLkkV9jRb976es6i/VtRtOduDX1KiOPanFbnFY5cFYt3qJS39TiQoiRR3p8ot+pnWKuFre22Lu7eE/X/AE0tll/E7W4RmUOi1pcCDHy9KjHd/LkSZYsWUJYWBheXl58+OGHqrmvvvoqXl5e7Nq1SxFvaWlh3bp1hISEEBAQwJIlS6ioqFDk1NXVkZCQQFBQEEFBQSQkJFBXV6fIKSsrIz4+noCAAEJCQli/fj2trcrf3ouKili4cCF+fn6EhYWxY8cOTKae7s0v+spJ5aNWi/tqrf8YWot3rvlLv9LEiWutpF9p4tljtaoTVtQKlrNKXG12aH/NGhVCDL0eFb7GxkbCw8PZvn07Wq36+qTDhw9z7tw5/P39LV5LSUnhyJEjpKWlkZWVRUNDA/Hx8RiNdycSrFixgsLCQtLT08nIyKCwsJCVK1eaXzcajcTHx2MwGMjKyiItLY3MzEw2btxozrl16xaLFy/G19eX7Oxstm/fzq5du9i9e3ePPhDRd+NVdi1Ri++f7WVRoDROd+Jd2bqlmJV5KgCoHYjQm1mjQoiRpUdDnfPmzWPevHkAJCYmWs25evUqGzZs4LPPPiMuLk7xWn19PR988AF79uwhKioKgH379jF16lRyc3OJiYmhuLiYL7/8kqNHjzJ9+nQA3nnnHWJjY83ro7Kzs7l06RIXLlwgMDAQgK1bt7JmzRo2b96Mh4cH6enpNDU1sXfvXrRaLeHh4Vy+fJn33nuP1atXd7seS/SPIHdnqpste2DB7tZ/3B7315I5/84i8PqWDjxdR6nOArV102nrq/LuxK2xddaoEGLk6ZfJLe3t7axYsYKkpCT0er3F6wUFBbS1tREdHW2OBQYGotfrOX36NAD5+fmMGzfOXPQAZsyYgZubmyJHr9ebix5ATEwMLS0tFBQUmHNmzpyp6JnGxMRQWVlJaWlpfzTXIdmyFu5BlQI3WSUOnYvA/Sj9WQCFL/ipLn2wddPpMSpjmmpxuDtr9Eisjv1PTJCiJ4Sd6ZfCt23bNsaPH8/y5cutvl5dXY1Go8Hb21sR1+l0VFdXm3O8vb0VPTInJyd8fHwUOTqdTvEe3t7eaDSabnM6/9yZI2wrZLY+V9v0qDsPuisLUX8dv7Mg0HoRUotv+cFYm+JCCPvX51mdJ06c4KOPPuL48eM2f63JZLIodL3J6RrvmtM5saW7Yc6SkpKeXXQ/Gezvd6+KJidWF7lS3nz395687xrZHdHCA1rLgcHNxS5826AsLN82GEn+qoJ/1lsvfu9MceL9q85cbx2FbnQHq4KaaL3WQMm1vl37htOugGXvbkPeTb7fYfnmuX91ASyLYu5f64hxvd63i7FiKO/rYHOUtjpKO8G+2trd9oF9LnzHjx/n2rVriiFOo9HI66+/zt69e7l48SK+vr4YjUZqa2vx8bm7aLimpoZZs2YB4OvrS01NjaLQmUwmamtrzT02X19f87Bnp9raWoxGoyKna8+upqYGwKIneK/B3GNxqPd0fPOrG5Q3Nyli5c2j+PDmBPZ/33JheE1xNWBZ4GoZS2ior9XvEQrM+X7/t/XmyQqr8bp2jdXv05tr762hvq+DyVHa6ijtBMdqa5+HOlesWMHJkyc5fvy4+T9/f38SExM5fPgwAJGRkbi4uJCTk2P+uoqKCoqLi83P9KZNm4bBYCA/P9+ck5+fT2NjoyKnuLhYsQwiJycHV1dXIiMjzTl5eXk0Nzcrcvz9/QkODu5rc+2CrRNEKgzW4+Uq8YGktjxhlEpcbe1glUpcCGH/etTjMxgMXLlyBYCOjg7Ky8spLCxk/PjxTJo0yaIn5ezszMSJE82/PXh6evLiiy+yZcsWdDod48ePZ+PGjURERDBnzhwA9Ho9c+fOZe3atbz77ruYTCbWrl3L/Pnzze8THR1NWFgYq1atIjU1lZs3b7JlyxaWLl2Kh4cHAHFxcezYsYPExESSkpL45ptv2LlzJ+vXr5cZnX9j6wSRZqP1IqEWH0iRPs6crrbcyizSx/qPsm6ME1cNlnFflaUVQgj716Me35///Gdmz57N7NmzaWpqYtu2bcyePZs33nijx9/ojTfeYNGiRSxbtowFCxbg5ubGJ598gkZz9x/b/fv388gjj/Dcc8/x/PPP88gjj7Bv3z7z6xqNhoMHDzJ27FgWLFjAsmXLWLRoEampqeYcT09PPv30UyorK4mKimLdunW88sorrF69usfXau82Pepuda1af0w+GWgTRlsvzmrxEA/rk14eVIkLIexfj3p8P/zhDy12UOnOhQsXLGJjxozh17/+Nb/+9a9Vv278+PH827/9W7fvPWnSJA4ePNhtTkREBF988UXPLtZBNbV2dPvne2mdR3HTyuta5/7b6rXzNIe6lg68ulnH19BuffsXg0p806PunL3eqlj03l8zTIUQI5NsUu2A1hy/SW2X+R61bXfi1gS4We9NPaASt9XJyiaeOXaDqwYjt9pMXDUYeebYDU5WNlnk2jpMG+zuwmfzvXkhRMsP/UbzQoiWz+Z7y9o8IRyYbFLtgP5YbX0Jglr8QXdnzl63fK27BemdG0lfqXEl5Lsb3e5+8vKJerp22NpNd+KFLyh7fb3pwdlyjJEQwv5J4RvGbDmFwJZclYMJVOO2FpvOBe938jX86VYTZ6+3qva06lTO/Km3Eu/swcmWYkKI3pLCN0wpi8cdasWjtKGN+Z/XcO2eKfrHK1s49qRPvxQEW4tNdxtJW+t5jdHALSudTVeVkVTpwQkh+kKe8Q1TtpxC8OrJOkXRA7jW1MGrJ+v6/bp6crjTFWtVDPhWJa52ZJQcJSWEGAjS4xumbFlkfvKa9dPE1eKuo6yfKO6q8muQLb1PgOvN1gtWtUq8pcP6GQqtHbLWTgjR/6THN0zZMnvR1md2oZ7W33uKStzWM/DUDpadqBL3Uqm4nmqVWAgh+kD+ZRmmNj3qTuBYZY8ncKxTv6w/Cxtv/TTx76nEbd3izHeM9R8rnUp87z94WpyI7ux0Jy6EEP1NCt9w1nWLtX7acs3WY4NsXTt3u836LE21+OP+Wg7Pn0DQOA2eLk4EjdNweP4E1TP5hBCiL+QZ3zCVeq6B8kZloShv7LA6M7I3p4zbMktz06PuHK9sUUyg8dOqb3FWVGe5l2Z3ceg8iFYKnRBi4EnhG0S2LOq2ZXjR2cn687xuDhm3aUlAuaHd4jSDqqYOyg3tKtdvaykWQojBI0Odg+TeU8z/dEtz31PMPVysFwl3K/HH/aw/m1OL22pZbp1FGTP9LW7N3/lYL+ZqcSGEGExS+AaJrTMj1ZawWYu/+7gXfl1mTPppR/Hu4169uVQL1c3Wn82pxVc/4mbxgzXqb3EhhBhqMtQ5SL5tsP58668qcVtOIQh2d+HYkz42beNlyxZntvrt5Sa6lsSOv8VlwooQYqhJ4esDW4rHd40qp5urxHtzCkFPn9mVNrQx7z+rqbp7SD1ff9fEfy3y7ZfiZ+vyByGEGEwy1NlL9z6zO3Gt9b7P7Awq592pxW1dcmCLV0/cVBQ9gKrmO3Fr1NaRq8VtLdpCCDGYpPD1kq3P7FQeh6nGB/IcuRNV1ouzWnziGOvv46cSH8iiLYQQfSVDnb1k60bMHSqTVdTiMHCnEKg8PlSNV7dYj1epxO9dJ3il1kCI9zg5OkgIMWxI4eslWzdidlJZ2tZPm7EMqHaVXqlaHO4W7ZKSWkJDgwbmwoQQohdkqLOXvEZbr1hqcYtpjveLDyOjVJqkFhdCiOFMeny9dL3JesVSi7u7OnGjxbLL5+7aP9VjIJcneLrAdSvDmp4ycimEGIGk8PVSo8o4n1p8um40X5RbVo/pur7vrlLa0MaczGpu3nP83pflTeQ+bX15gsYJjFaGXTUqNTjI3ZnrLZbrDYPd5cdHCDHy9Gio8+TJkyxZsoSwsDC8vLz48MMPza+1tbXx+uuvM2vWLAICAtDr9axYsYKysjLFe7S0tLBu3TpCQkIICAhgyZIlVFRUKHLq6upISEggKCiIoKAgEhISqKurU+SUlZURHx9PQEAAISEhrF+/ntZW5YGrRUVFLFy4ED8/P8LCwtixY0e/n+bdorIkTS2+fYan1WOGts/o+9E7S7NrFEUP4Gbrnbg1D46z/j4hanEP6127B1XiQggxnPWo8DU2NhIeHs727dvRapU7b9y+fZvz58+TlJTEV199xUcffURFRQVxcXG0t9/tJaSkpHDkyBHS0tLIysqioaGB+Ph4jMa7lWLFihUUFhaSnp5ORkYGhYWFrFy50vy60WgkPj4eg8FAVlYWaWlpZGZmsnHjRnPOrVu3WLx4Mb6+vmRnZ7N9+3Z27drF7t27e/0hWeOmspemWjzY3YXPF+p4IUTLY55GXgjR8vlC3X13V3npqxss+uI6L311Q3WN4Pkb1nuZavEf6KzvnhKpEpflCUIIe9Kjsap58+Yxb948ABITExWveXp68tlnnyli77zzDjNmzKC4uJiIiAjq6+v54IMP2LNnD1FRUQDs27ePqVOnkpubS0xMDMXFxXz55ZccPXqU6dOnm98nNjaWkpISQkNDyc7O5tKlS1y4cIHAwEAAtm7dypo1a9i8eTMeHh6kp6fT1NTE3r170Wq1hIeHc/nyZd577z1Wr16NUz9No+zN0KUtMx1LG9p4Mus65bfv9lTzrjXft1j2xKZH3Tl7vVWxDrG7QmbrMUZCCDGcDciszoaGO4u4vby8ACgoKKCtrY3o6GhzTmBgIHq9ntOnTwOQn5/PuHHjzEUPYMaMGbi5uSly9Hq9uegBxMTE0NLSQkFBgTln5syZip5pTEwMlZWVlJaW9lsbezN02dmDW1Xo2m0PDmDDqXpF0QMov21iw6n6vl04vVsc31m0j8Tq2P/EBCl6QogRq99nJ7S2trJp0yYWLFjAAw88AEB1dTUajQZvb29Frk6no7q62pzj7e2t6JE5OTnh4+OjyNHpdIr38Pb2RqPRKHICAgIsvk/na5MnT7Z63SUlJTa3dXeYE+9fdeZ66yh0oztYFdRO67W/UnLNMreiyYmVF1ypah0FaPjTrSa+Lm9k39QWHtBaPn88fW0M1n4vOV3VZOVaredCR7ftWn/Px9R6rdbqdfeH3ny2I5W01f44SjvBvtoaGhqq+lq/Fr729nYSEhKor6/n448/vm++yWSyKHS9yeka75rTObGlu2HO7j4k1a8B5ny/Z7lb/ruGqlbl0GhV6yj2VXnx8Y98LPI1Z7+zupWKRqOxvNYTFRZ5d4zqVbv6U+cwtSOQttofR2knOFZb+22os729neXLl1NUVMThw4eZMOHuVlu+vr4YjUZqa2sVX1NTU2Pujfn6+lJTU6OYfWkymaitrVXkdPbsOtXW1mI0GrvNqam5M7uxa29xMJ2tsT6sqRaPGG99KNFaXO0myu4EQghhqV/+bWxra2PZsmUUFRVx5MgRJk6cqHg9MjISFxcXcnJyzLGKigqKi4vNz/SmTZuGwWAgPz/fnJOfn09jY6Mip7i4WLEMIicnB1dXVyIjI805eXl5NDc3K3L8/f0JDg7uj+b2ilFlU06jyjKLm1bWzanF1R63yWM4IYSw1KPCZzAYKCwspLCwkI6ODsrLyyksLKSsrIz29nZ+/vOfc/bsWQ4cOICTkxNVVVVUVVXR1NQE3Jn5+eKLL7JlyxZyc3M5f/48K1euJCIigjlz5gCg1+uZO3cua9eu5cyZM+Tn57N27Vrmz59v7n5HR0cTFhbGqlWrOH/+PLm5uWzZsoWlS5fi4eEBQFxcHFqtlsTERC5evEhmZiY7d+4kMTGx32Z09oaryupwV5V9v2xZovC4n6vVXLW4EEI4sh4Vvj//+c/Mnj2b2bNn09TUxLZt25g9ezZvvPEGFRUVZGVlUVlZyZw5c9Dr9eb/Dh06ZH6PN954g0WLFrFs2TIWLFiAm5sbn3zyCRrN3fVh+/fv55FHHuG5557j+eef55FHHmHfvn3m1zUaDQcPHmTs2LEsWLCAZcuWsWjRIlJTU805np6efPrpp1RWVhIVFcW6det45ZVXWL16dX98Xr0W4Gb9o35AJW6LbdM9CezyPoFuo9g2ve+L44UQwt441dXV9e+WJsKql766QfqVJov4CyFaq0cPef272oQVqFv2gEWsc6/O4bbOzpEemEtb7Y+jtBMcq62y2eIg+ccpWg5926TYI1PjdCfeHwbq7D4hhLA3MvGvD3q6pRjAnqJGi42hjaY7cWvU+mpD34cTQoiRTXp8vVTa0MaTX9RQ3nh3skleVQufx/pYHWI8VdVqEesuPmuiC19VWRbSWROl9AkhRF9Ij6+XUk7XK4oeQHljBymnrW8p1mhlMXp38f/vh+OZ2GVS5kTXO3EhhBC9Jz2+Xjp5zcrJrMAfVeJjnaHFSudurModCHZ34b+e8h2WE1aEEGIkk8LXS/Uqj/PqVOL/Z4ILudcsX/w/E+6/MbQQQoj+I0Odg2Ssi/WPWi0uhBBiYMi/ur2ktgeMWvxWm/VneQ0qcSGEEANDCl8veaiMUKrF/cdqrMb9VOJCCCEGhhS+XopUeTanFt/0qDsPuiuLXHenngshhBgYUvh6SW2/a7X4vaeeP+Zp7NGp50IIIfqfzOrspaI6o01xuDtLs6SkltDQoIG6NCGEEN2QHl+vqU1KkckqQggxnEnh66W/1422KS6EEGJ4kMLXS3IGnhBCjEzyjK+Xgt1d+DzWR7YUE0KIEUYKXx/IlmJCCDHyyFCnEEIIhyKFTwghhEORwieEEMKhSOETQgjhUJzq6upkxbUQQgiHIT0+IYQQDkUKnxBCCIcihU8IIYRDkcInhBDCoUjhE0II4VCk8A2At99+m6ioKCZNmsRDDz1EfHw8Fy9eVOSYTCa2bdvG9773Pfz8/HjyySe5dOnSEF1x7/WkrS+//DJeXl6K/+bOnTtEV9x7+/fvZ9asWUyaNIlJkybxox/9iGPHjplft5d7Cvdvq73c067eeustvLy8WLdunTlmT/f1Xtbaaq/3tSspfAPgxIkTLF++nGPHjpGZmYmzszPPPvssN2/eNOe8++677Nmzhx07dpCdnY1Op2Px4sU0NDQM4ZXbridtBZgzZw7FxcXm/9LT04foinsvICCArVu38tVXX5GTk8Ps2bP56U9/yl/+8hfAfu4p3L+tYB/39F5nzpzhd7/7HREREYq4Pd3XTmptBfu7r9ZI4RsAhw4d4mc/+xnh4eFERESwb98+ampqOHXqFHDnN8i9e/fyi1/8gmeeeYbw8HD27t2LwWAgIyNjiK/eNvdraydXV1cmTpxo/m/8+PFDdMW99+STT/KjH/2IkJAQHn74YTZv3sy4ceM4c+aMXd1T6L6tnezhnnaqr6/npZdeYteuXXh5eZnj9nZfQb2tnezpvqqRwjcIDAYDHR0d5h+y0tJSqqqqiI6ONudotVpmzZrF6dOnh+gq+0fXtnbKy8vj4Ycf5rHHHmPNmjVcv359aC6wnxiNRv7whz/Q2NjItGnT7Pqedm1rJ3u6p52F7YknnlDE7fG+qrW1kz3dVzVyLNEg2LBhA1OnTjX/o1FVVQWATqdT5Ol0OiorKwf9+vpT17YCzJ07l6eeeorg4GCuXr1KamoqTz/9NLm5ubi6ug7h1dquqKiIefPm0dzcjJubG7///e+JiIgw/yNoT/dUra1gX/f0d7/7HVeuXGHfvn0Wr9nb39Xu2gr2dV+7I4VvgL322mucOnWKo0ePotFoFK85OTkp/mwymSxiI4laW59//nnz/0dERBAZGcnUqVM5duwYTz/99FBcaq+FhoZy/Phx6uvryczM5OWXX+Y///M/za/b0z1Va2t4eLjd3NOSkhJ+9atf8cUXXzB69GjVPHu4rz1pq73c1/uRwjeAUlJSOHToEEeOHGHy5Mnm+MSJEwGorq4mMDDQHK+pqbH4zXKkUGurNf7+/gQEBHDlypXBubh+NHr0aEJCQgD4wQ9+wLlz53jvvfdISkoC7OueqrV19+7dFrkj9Z7m5+dTW1vLzJkzzTGj0cgf//hHfvOb35ifVdvDfb1fW7/77juLXt1Iva/3I8/4BkhycjIZGRlkZmYyZcoUxWvBwcFMnDiRnJwcc6y5uZm8vDymT58+2JfaZ9211Zra2loqKyvNvwCMZB0dHbS2ttrdPbWms63WjNR7+uSTT/LHP/6R48ePm//7wQ9+wPPPP8/x48d5+OGH7ea+3q+t1nqBI/W+3o/0+AZAUlISBw8e5Pe//z1eXl7m5wRubm6MGzcOJycnXn75Zd566y1CQ0N5+OGH+dd//Vfc3NyIi4sb4qu3zf3aajAY2L59O08//TQTJ07k6tWr/OpXv0Kn07Fo0aIhvnrb/PKXv2TevHk88MAD5ll9J06c4D/+4z/s6p5C9221p3vauVbtXmPHjmX8+PGEh4cD2M19vV9b7em+3o8UvgFw4MABAJ555hlFPDk5mZSUFABeffVVmpqaWLduHXV1dTz22GMcOnQId3f3Qb/evrhfWzUaDRcvXuSTTz6hvr6eiRMn8sMf/pB///d/H3FtraqqIiEhgerqajw8PIiIiCAjI4OYmBjAfu4pdN/WpqYmu7mnPWFP97U79vR39X7kPD4hhBAORZ7xCSGEcChS+IQQQjgUKXxCCCEcihQ+IYQQDkUKnxBCCIcihU8IIYRDkcInhBDCoUjhE0II4VCk8AkhhHAo/z/n7Sl3T5FbTwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(earnings.age, earnings.earnings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "100a985c",
   "metadata": {},
   "outputs": [],
   "source": [
    "earnings.to_csv('data/ml_earnings.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "567bbbbb",
   "metadata": {},
   "source": [
    "### Post-training earnings data (enhanced)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "2d26efeb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train set large\n",
    "SAMPLE_SIZE = 5000\n",
    "MAX_AGE = 50\n",
    "\n",
    "age = stats.halfnorm.rvs(loc=19, scale=10, size=SAMPLE_SIZE).astype(int)\n",
    "age = np.where(age > MAX_AGE, np.random.choice(np.arange(20, MAX_AGE)), age)\n",
    "\n",
    "took_a_course = stats.bernoulli(p=10/age).rvs().astype(bool)\n",
    "python_proficiency = np.random.uniform(0, 1, SAMPLE_SIZE)\n",
    "\n",
    "noise = np.random.randn(SAMPLE_SIZE)\n",
    "\n",
    "earnings = 75000 + took_a_course * 10000 + took_a_course * python_proficiency * 5000 + age * 1000 + age**2 * 50 + noise * 2000\n",
    "earnings = earnings.round()\n",
    "\n",
    "earnings = pd.DataFrame(dict(\n",
    "    age=age,\n",
    "    python_proficiency = python_proficiency,\n",
    "    took_a_course=took_a_course,\n",
    "    earnings=earnings\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "99788d91",
   "metadata": {},
   "outputs": [],
   "source": [
    "earnings.to_csv('data/ml_earnings_interaction_train.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "555e6273",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test set\n",
    "SAMPLE_SIZE = 100\n",
    "MAX_AGE = 50\n",
    "\n",
    "age = stats.halfnorm.rvs(loc=19, scale=10, size=SAMPLE_SIZE).astype(int)\n",
    "age = np.where(age > MAX_AGE, np.random.choice(np.arange(20, MAX_AGE)), age)\n",
    "\n",
    "python_proficiency = np.random.uniform(0, 1, SAMPLE_SIZE)\n",
    "\n",
    "noise = np.random.randn(SAMPLE_SIZE)\n",
    "\n",
    "earnings_0 = (75000 + 0 * 10000 + 0 * python_proficiency * 5000 + age * 5000 + age**2 * 50 + noise * 2000).round()\n",
    "earnings_1 = (75000 + 1 * 10000 + 1 * python_proficiency * 5000 + age * 5000 + age**2 * 50 + noise * 2000).round()\n",
    "true_effect = earnings_1 - earnings_0\n",
    "\n",
    "earnings_test = pd.DataFrame(dict(\n",
    "    age=age,\n",
    "    python_proficiency=python_proficiency,\n",
    "    took_a_course=True,\n",
    "    true_effect=true_effect\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "id": "2763703c",
   "metadata": {},
   "outputs": [],
   "source": [
    "earnings_test.to_csv('data/ml_earnings_interaction_test.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "74afa358",
   "metadata": {},
   "source": [
    "## Chapter 11"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "283685f2",
   "metadata": {},
   "source": [
    "### Simulated data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 467,
   "id": "9db54f09",
   "metadata": {},
   "outputs": [],
   "source": [
    "SAMPLE_SIZE = 2000\n",
    "\n",
    "def get_data_dgp_11(sample_size=1000, with_interaction=True):\n",
    "    chi = np.random.chisquare(12, (sample_size, 5)) / 30\n",
    "    norm = np.random.normal(0, 1, (sample_size, 5))\n",
    "    binom = np.random.binomial(1, [.3, .5, .7, .12, .9], (sample_size, 5))\n",
    "    gumbel = np.random.gumbel(-2, 1, (sample_size, 5)) / 6\n",
    "\n",
    "    X = np.concatenate([chi, norm, binom, gumbel], axis=1)\n",
    "\n",
    "    T0 = np.zeros(sample_size)\n",
    "    T1 = np.ones(sample_size)\n",
    "    \n",
    "    noise = np.random.randn(sample_size)\n",
    "    \n",
    "    coefs = np.random.gumbel(0, 10, X.shape[1] + 1)\n",
    "    \n",
    "    if with_interaction:\n",
    "        interaction_term = X[:, 0] + X[:, 7]\n",
    "        y0 = (coefs * np.concatenate([(T0 * interaction_term).reshape(-1, 1), X], axis=1)).sum(axis=1) + noise\n",
    "        y1 = (coefs * np.concatenate([(T1 * interaction_term).reshape(-1, 1), X], axis=1)).sum(axis=1) + noise\n",
    "    else:\n",
    "        interaction_term = 1\n",
    "        y0 = (coefs * np.concatenate([\n",
    "            T0.reshape(-1, 1), \n",
    "            X], \n",
    "            axis=1))\\\n",
    "        .sum(axis=1) + noise\n",
    "        \n",
    "        y1 = (coefs * np.concatenate([\n",
    "            10 * np.exp(\n",
    "                T1 + X[:, 7:12]\\\n",
    "                .sum(axis=1) / 1000).reshape(-1, 1), \n",
    "            X], \n",
    "            axis=1)).sum(axis=1) + noise\n",
    "    \n",
    "    return X, y0, y1, coefs\n",
    "\n",
    "\n",
    "def filter_outcomes(y, t):\n",
    "    result = np.zeros_like(t, dtype=float)  \n",
    "    result[t == 0] = y[t == 0, 0]      \n",
    "    result[t == 1] = y[t == 1, 1]    \n",
    "    return result"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "51807226",
   "metadata": {},
   "source": [
    "#### No interaction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 468,
   "id": "08f5832e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# No-interaction data\n",
    "X, y0, y1, coefs = get_data_dgp_11(SAMPLE_SIZE, False)\n",
    "\n",
    "# Generate treatments\n",
    "T = np.random.binomial(1, 0.5, SAMPLE_SIZE)\n",
    "\n",
    "# Train-test split\n",
    "X_train, X_test, y_train, y_test, T_train, T_test = train_test_split(X, np.stack([y0, y1]).T, T, test_size=.1)\n",
    "\n",
    "# Filter actual outcomes\n",
    "y_train_actual = filter_outcomes(y_train, T_train)\n",
    "y_test_actual = filter_outcomes(y_test, T_test)\n",
    "\n",
    "# To DF & update cols\n",
    "data_no_interaction_train = pd.DataFrame(\n",
    "    np.concatenate(\n",
    "        [\n",
    "            X_train, \n",
    "            T_train.reshape(-1, 1), \n",
    "            y_train_actual.reshape(-1, 1)],\n",
    "        axis=1\n",
    "    )\n",
    ")\n",
    "\n",
    "data_no_interaction_train.columns = [f'x{i}' for i in range(20)] + ['treatment', 'outcome']\n",
    "\n",
    "\n",
    "data_no_interaction_test = pd.DataFrame(\n",
    "    np.concatenate(\n",
    "        [\n",
    "            X_test, \n",
    "            T_test.reshape(-1, 1), \n",
    "            (y_test[:, 1] - y_test[:, 0]).reshape(-1, 1)],\n",
    "        axis=1\n",
    "    )\n",
    ")\n",
    "\n",
    "\n",
    "data_no_interaction_test.columns = [f'x{i}' for i in range(20)] + ['treatment', 'true_effect']\n",
    "\n",
    "# Store\n",
    "data_no_interaction_train.to_csv('data/data_11_no_interaction_train.csv', index=False)\n",
    "data_no_interaction_test.to_csv('data/data_11_no_interaction_test.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 469,
   "id": "e87ab637",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 34.89751834,   8.61226375, -12.63565299, ...,   2.99937547,\n",
       "         14.48281498,  32.13379007]),\n",
       " array([ -9.92622448, -36.0636879 , -57.4257863 , ..., -41.66052399,\n",
       "        -30.30810687, -12.62264189]))"
      ]
     },
     "execution_count": 469,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y0, y1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 470,
   "id": "9ad1f4f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-44.82374282, -44.67595165, -44.79013331, ..., -44.65989946,\n",
       "       -44.79092185, -44.75643196])"
      ]
     },
     "execution_count": 470,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y1 - y0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31ce7420",
   "metadata": {},
   "source": [
    "#### With interaction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 475,
   "id": "71909b39",
   "metadata": {},
   "outputs": [],
   "source": [
    "# No-interaction data\n",
    "X, y0, y1, coefs = get_data_dgp_11(SAMPLE_SIZE, True)\n",
    "\n",
    "# Generate treatments\n",
    "T = np.random.binomial(1, 0.5, SAMPLE_SIZE)\n",
    "\n",
    "# Train-test split\n",
    "X_train, X_test, y_train, y_test, T_train, T_test = train_test_split(X, np.stack([y0, y1]).T, T, test_size=.1)\n",
    "\n",
    "# Filter actual outcomes\n",
    "y_train_actual = filter_outcomes(y_train, T_train)\n",
    "y_test_actual = filter_outcomes(y_test, T_test)\n",
    "\n",
    "# To DF & update cols\n",
    "data_with_interaction_train = pd.DataFrame(\n",
    "    np.concatenate(\n",
    "        [\n",
    "            X_train, \n",
    "            T_train.reshape(-1, 1), \n",
    "            y_train_actual.reshape(-1, 1)],\n",
    "        axis=1\n",
    "    )\n",
    ")\n",
    "\n",
    "data_with_interaction_train.columns = [f'x{i}' for i in range(20)] + ['treatment', 'outcome']\n",
    "\n",
    "\n",
    "data_with_interaction_test = pd.DataFrame(\n",
    "    np.concatenate(\n",
    "        [\n",
    "            X_test, \n",
    "            T_test.reshape(-1, 1), \n",
    "            (y_test[:, 1] - y_test[:, 0]).reshape(-1, 1)],\n",
    "        axis=1\n",
    "    )\n",
    ")\n",
    "\n",
    "\n",
    "data_with_interaction_test.columns = [f'x{i}' for i in range(20)] + ['treatment', 'true_effect']\n",
    "\n",
    "# Store\n",
    "data_with_interaction_train.to_csv('data/data_11_with_interaction_train.csv', index=False)\n",
    "data_with_interaction_test.to_csv('data/data_11_with_interaction_test.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 474,
   "id": "72d62da4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.63371979e+01,  1.59045881e+01],\n",
       "       [-3.06609145e+00, -1.84422244e+00],\n",
       "       [-1.03359389e+01, -7.87196605e+00],\n",
       "       [ 4.46875521e+00,  4.96359667e+00],\n",
       "       [ 3.18576088e+01,  3.37932190e+01],\n",
       "       [ 1.19932029e+01,  2.98884169e+00],\n",
       "       [ 9.69525909e+00,  7.93796477e+00],\n",
       "       [-7.38440149e+00, -8.60629373e+00],\n",
       "       [ 2.41254919e+01,  1.35643255e+01],\n",
       "       [ 1.97529380e+01,  1.63711534e+01],\n",
       "       [ 2.43068304e+01,  2.23882172e+01],\n",
       "       [-7.98322104e+00, -1.16848962e+01],\n",
       "       [-2.54805435e+00, -9.10154055e+00],\n",
       "       [-2.12343129e+01, -2.11751544e+01],\n",
       "       [ 1.99864813e+00, -3.96052447e+00],\n",
       "       [ 1.54031350e+01,  1.24315489e+01],\n",
       "       [ 7.02887405e+00,  5.32079794e+00],\n",
       "       [ 2.07225787e+00,  3.98963467e+00],\n",
       "       [ 1.05768464e+01,  1.16281929e+01],\n",
       "       [ 2.58577213e+01,  2.11922848e+01],\n",
       "       [ 1.11960625e+01,  6.92346165e+00],\n",
       "       [ 8.63504905e+00,  7.25811536e+00],\n",
       "       [ 1.45579537e+01,  1.14446180e+01],\n",
       "       [-1.05771942e+01, -1.20518737e+01],\n",
       "       [ 1.50904679e+01,  1.10366070e+01],\n",
       "       [ 1.18356618e+01,  4.89932210e+00],\n",
       "       [ 1.18242024e+01,  9.95327571e+00],\n",
       "       [-4.42150929e+00, -8.56758507e-01],\n",
       "       [ 1.90045567e+01,  2.25398094e+01],\n",
       "       [-9.05976959e+00, -1.25219855e+01],\n",
       "       [ 1.83118169e+01,  1.50464956e+01],\n",
       "       [-9.71122700e+00, -9.93741894e+00],\n",
       "       [-4.60208952e+00,  2.71032818e+00],\n",
       "       [-2.15014674e+01, -2.04324464e+01],\n",
       "       [ 1.31560593e+01,  1.00683814e+01],\n",
       "       [-1.79174245e+01, -1.94410151e+01],\n",
       "       [ 1.93439684e+01,  2.07561603e+01],\n",
       "       [-1.58168095e+01, -1.03663941e+01],\n",
       "       [-7.75661603e+00, -5.33883483e+00],\n",
       "       [ 3.22820694e+01,  2.83894343e+01],\n",
       "       [-2.01817097e+01, -1.76334410e+01],\n",
       "       [ 3.17113259e+01,  2.81813370e+01],\n",
       "       [-1.09985805e+01, -7.91712686e+00],\n",
       "       [ 2.13866154e+01,  1.66148733e+01],\n",
       "       [-1.48789697e+01, -8.17279022e+00],\n",
       "       [ 9.17563124e+00,  6.02526627e+00],\n",
       "       [ 2.58140460e+00, -1.12501476e-01],\n",
       "       [-1.21727684e+01, -1.07267230e+01],\n",
       "       [-1.90409191e+01, -1.65927276e+01],\n",
       "       [ 3.72328902e+01,  3.79919913e+01],\n",
       "       [-9.47953164e-01,  1.21423457e+00],\n",
       "       [-1.45674899e+00, -1.87900795e+00],\n",
       "       [-1.53365385e+01, -1.20907620e+01],\n",
       "       [ 1.32460485e+01,  8.05235665e+00],\n",
       "       [ 3.63131086e+00,  4.44464150e+00],\n",
       "       [-3.49429150e+00, -4.56076985e+00],\n",
       "       [ 3.10834849e+00, -8.10915916e-01],\n",
       "       [ 2.77119072e+01,  2.41466627e+01],\n",
       "       [-1.70212069e+01, -1.68258705e+01],\n",
       "       [ 5.04745524e+01,  4.66274134e+01],\n",
       "       [ 1.48456545e+00,  2.31260440e+00],\n",
       "       [-1.18330778e+01, -8.95333768e+00],\n",
       "       [ 2.81931054e+01,  2.69260371e+01],\n",
       "       [-4.21986388e+00, -1.41826508e+00],\n",
       "       [ 1.46745912e+01,  1.20520962e+01],\n",
       "       [-1.11813974e+01, -1.13292031e+01],\n",
       "       [-6.02535429e+00, -5.27047080e+00],\n",
       "       [ 3.07129641e+01,  2.91119306e+01],\n",
       "       [ 5.46496851e+00,  4.28464672e-02],\n",
       "       [ 2.59619746e+01,  2.00919997e+01],\n",
       "       [ 3.49669991e+00,  1.86626270e-01],\n",
       "       [ 1.47507462e+01,  1.03204859e+01],\n",
       "       [ 3.67698077e+01,  3.48365664e+01],\n",
       "       [ 5.50672610e+00,  4.63873391e+00],\n",
       "       [ 2.72345918e+01,  1.94359379e+01],\n",
       "       [ 1.89310805e+01,  1.55254734e+01],\n",
       "       [ 1.24083762e+01,  6.71134780e+00],\n",
       "       [-6.70397101e+00, -2.09137370e-01],\n",
       "       [-2.06016817e+01, -1.86689633e+01],\n",
       "       [-1.27942259e+01, -9.91056303e+00],\n",
       "       [ 1.77919844e+01,  1.51883790e+01],\n",
       "       [ 9.71692034e+00,  4.06146703e+00],\n",
       "       [-2.39694407e+00, -1.79774750e+00],\n",
       "       [ 1.05421436e+01,  9.23487406e+00],\n",
       "       [ 1.22045593e+00, -1.07875268e+00],\n",
       "       [-1.07546228e+01, -4.74439361e+00],\n",
       "       [-4.81063247e+00, -8.78817245e+00],\n",
       "       [ 2.31450768e+01,  1.91748792e+01],\n",
       "       [-4.18048254e-02,  4.66992569e+00],\n",
       "       [-4.84710941e+01, -4.25253861e+01],\n",
       "       [ 1.08864775e+01,  9.17476425e+00],\n",
       "       [-3.27788678e+01, -2.98826665e+01],\n",
       "       [-3.54064472e+00, -4.09085475e+00],\n",
       "       [ 1.12191320e+01,  1.10812952e+01],\n",
       "       [ 1.43260242e+01,  1.37793678e+01],\n",
       "       [ 6.55637768e+00,  5.68611171e+00],\n",
       "       [-1.11058627e+01, -6.48561277e+00],\n",
       "       [ 7.85651786e+00,  8.00218141e+00],\n",
       "       [ 4.46217704e+00,  1.14376013e+00],\n",
       "       [ 3.29265255e+01,  3.49590882e+01],\n",
       "       [ 4.85949912e+01,  4.11315565e+01],\n",
       "       [ 1.19354415e+01,  9.37292040e+00],\n",
       "       [ 3.73828496e+01,  2.93836980e+01],\n",
       "       [ 3.30241330e+01,  3.16625772e+01],\n",
       "       [ 1.03462408e+01, -2.98447923e-02],\n",
       "       [ 1.71716305e+01,  1.27985762e+01],\n",
       "       [ 2.39301432e+01,  1.67916835e+01],\n",
       "       [-6.80227926e+00, -4.66061176e+00],\n",
       "       [ 1.11362317e+01,  9.57382673e+00],\n",
       "       [-1.40885447e+01, -1.69857739e+01],\n",
       "       [ 7.84001035e+00, -1.13391887e+00],\n",
       "       [ 1.29158828e+01,  1.19344714e+01],\n",
       "       [-1.06241773e+01, -1.44117050e+01],\n",
       "       [ 3.34716804e+01,  2.83302128e+01],\n",
       "       [-1.55024300e+01, -4.42891211e+00],\n",
       "       [-3.48955183e+01, -3.03897814e+01],\n",
       "       [ 4.33012619e+00, -1.58450917e+00],\n",
       "       [-6.73299617e+00, -3.80308113e+00],\n",
       "       [-2.35171223e+00, -3.40258619e+00],\n",
       "       [ 2.16014984e+01,  2.05111530e+01],\n",
       "       [-1.14591606e+01, -7.76235530e+00],\n",
       "       [-7.04712496e+00, -6.03325693e+00],\n",
       "       [-9.36561770e+00, -1.05784975e+01],\n",
       "       [ 1.51976132e+01,  1.75157483e+01],\n",
       "       [-5.27283770e+00, -7.32338430e+00],\n",
       "       [ 8.84887775e+00,  2.55961507e+00],\n",
       "       [ 2.90433598e+01,  3.13938059e+01],\n",
       "       [-4.87162731e+00, -1.53659872e+00],\n",
       "       [ 7.03312019e+00,  1.35123136e+01],\n",
       "       [ 6.21913815e+00,  7.58283931e+00],\n",
       "       [ 1.76485937e+01,  1.86315969e+01],\n",
       "       [ 2.07231446e+01,  1.73708759e+01],\n",
       "       [-1.72123582e+01, -1.38098575e+01],\n",
       "       [ 1.30620030e+01,  9.51586713e+00],\n",
       "       [ 2.05136987e+01,  1.57194550e+01],\n",
       "       [-1.11707578e+01, -1.03694898e+01],\n",
       "       [ 1.24416031e+01,  1.70158000e+01],\n",
       "       [-1.14513667e+01, -1.00988180e+01],\n",
       "       [ 3.96631849e-01,  1.83286619e-01],\n",
       "       [-1.13900816e+01, -1.51945068e+01],\n",
       "       [ 2.78301418e+01,  2.35278376e+01],\n",
       "       [-2.68684473e-01, -2.30994163e+00],\n",
       "       [-5.79886629e+00, -6.99356703e+00],\n",
       "       [ 2.65129618e+01,  2.79829307e+01],\n",
       "       [-3.11016922e+01, -2.83476668e+01],\n",
       "       [-3.29337223e+01, -3.42038410e+01],\n",
       "       [-7.92670029e+00, -8.73191611e+00],\n",
       "       [-2.13186348e+01, -1.76976884e+01],\n",
       "       [ 7.22327839e+00,  9.56145219e+00],\n",
       "       [ 2.51860980e+01,  2.35128088e+01],\n",
       "       [-9.75730761e+00, -8.19911689e+00],\n",
       "       [ 1.87096854e+01,  2.08531999e+01],\n",
       "       [ 2.07770950e+01,  1.73692478e+01],\n",
       "       [ 9.11028756e+00,  1.49462636e+01],\n",
       "       [ 1.68582071e+01,  1.34435252e+01],\n",
       "       [ 2.84208726e+01,  2.80328979e+01],\n",
       "       [-1.52401652e+01, -1.42386803e+01],\n",
       "       [-9.38910802e+00, -9.95319600e+00],\n",
       "       [ 5.15012713e+00,  6.99167391e-01],\n",
       "       [-3.92883724e+01, -3.68248844e+01],\n",
       "       [ 1.87737433e+01,  1.72472071e+01],\n",
       "       [ 3.44181087e+01,  3.48890706e+01],\n",
       "       [-3.26906582e+01, -3.06290054e+01],\n",
       "       [-2.40423520e+01, -1.45808145e+01],\n",
       "       [ 2.13756242e+00,  5.27183576e+00],\n",
       "       [ 4.90009592e+00,  5.61565090e+00],\n",
       "       [ 4.54859419e+01,  3.76886129e+01],\n",
       "       [ 2.31324656e+01,  2.85480940e+01],\n",
       "       [ 3.24232398e+01,  2.99045715e+01],\n",
       "       [ 4.88183641e+00,  3.50497753e+00],\n",
       "       [ 9.65033120e+00,  2.89810642e+00],\n",
       "       [ 3.12306314e+01,  2.94777631e+01],\n",
       "       [ 1.59728878e+01,  1.06858864e+01],\n",
       "       [ 1.20997705e+00,  5.95213548e-01],\n",
       "       [ 1.33539038e+01,  1.75907486e+01],\n",
       "       [ 2.62493437e+00,  6.53876886e-01],\n",
       "       [-7.84884410e-01,  3.13612900e+00],\n",
       "       [-2.36139030e+00, -1.67070765e+00],\n",
       "       [ 3.12325596e+00,  3.24636605e-02],\n",
       "       [ 2.39675710e+01,  2.50891188e+01],\n",
       "       [-3.65625038e+00, -4.68218168e+00],\n",
       "       [ 6.93704714e+00,  8.36365197e+00],\n",
       "       [-1.56590115e+01, -1.38351988e+01],\n",
       "       [-3.86054744e+00, -7.50929234e+00],\n",
       "       [ 3.56766317e+00,  4.16903836e+00],\n",
       "       [ 6.94816559e+00,  4.98694702e-01],\n",
       "       [ 1.98748626e+01,  1.86915632e+01],\n",
       "       [ 9.63694712e+00,  5.44322195e+00],\n",
       "       [ 6.35608252e+00,  7.74252966e+00],\n",
       "       [ 2.29898145e+01,  1.74060334e+01],\n",
       "       [ 2.21190507e+01,  1.57930569e+01],\n",
       "       [-7.53326886e+00, -1.15775530e+01],\n",
       "       [-1.41301319e+01, -1.06411886e+01],\n",
       "       [-1.40032833e+01, -1.28802892e+01],\n",
       "       [ 2.14106292e+01,  1.91283181e+01],\n",
       "       [ 5.16093055e-01,  3.61359753e+00],\n",
       "       [-1.72316863e+00,  3.20182049e+00],\n",
       "       [ 3.89969272e+00,  6.56362203e+00],\n",
       "       [ 2.11949427e+01,  1.98445588e+01],\n",
       "       [ 4.79663284e+00,  7.20981922e+00]])"
      ]
     },
     "execution_count": 474,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "628499f2",
   "metadata": {},
   "source": [
    "### Chapter 11 - NLP data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2ac5b5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('data/manga.csv')  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3f61a77",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check gender indicator\n",
    "df['gender_indicator'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2df35ef5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Produce female avatar indicator\n",
    "avatar = []\n",
    "\n",
    "for i in df['gender_indicator']:\n",
    "    if i:\n",
    "        avatar.append(np.random.choice([0, 1], p=[.15, .85]))\n",
    "    else:\n",
    "        avatar.append(np.random.choice([0, 1], p=[.97, .03]))\n",
    "        \n",
    "df['female_avatar'] = avatar\n",
    "\n",
    "# Sanity\n",
    "df.groupby('gender_indicator')['female_avatar'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d33d6722",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Produce stereotype indicator\n",
    "df['love_indicator'] = df['text'].str.contains('love|roman').astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ba8a129",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add photo to the post at random\n",
    "df['has_photo'] = np.random.choice([0, 1], size=df.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2dcbd14",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Produce the outcome\n",
    "noise = np.random.normal(0.01, .05, size=df.shape[0])\n",
    "probas = .8 + 0.1 * df['love_indicator'] * 1.2 * (0.1 + df['has_photo']) - 0.7 * df['female_avatar'] + noise\n",
    "probas = np.clip(probas, 0, 1)\n",
    "\n",
    "upvote = []\n",
    "\n",
    "for p in probas:\n",
    "    upvote_ = np.random.choice([0, 1], p=[1 - p, p])\n",
    "    upvote.append(upvote_)\n",
    "    \n",
    "df['upvote'] = likes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02c2eee9",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop(['gender_indicator', 'topic', 'love_indicator', 'author'], axis=1).to_csv('data/manga_processed.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:causal_book_py38]",
   "language": "python",
   "name": "conda-env-causal_book_py38-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
